%matplotlib inline
import networkx as nx
from decorator import decorator
from networkx.utils import create_random_state, create_py_random_state
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
# Remove scientific notations and display numbers with 2 decimal points instead
pd.options.display.float_format = '{:,.2f}'.format
# Update the default background style of the plots
sns.set_style(style='darkgrid')
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly
import plotly.express as px
init_notebook_mode()
import warnings
warnings.filterwarnings('ignore')
import plotly.offline as pyo
import plotly.graph_objs as go
An error occurred. ValueError: Please install Node.js and npm before continuing installation. You may be able to install Node.js from your package manager, from conda, or directly from the Node.js website (https://nodejs.org). See the log file for details: C:\Users\Freddy\AppData\Local\Temp\jupyterlab-debug-4dgzyxq_.log
os.listdir("raw_data_books/raw_data_books/")
['book1.csv', 'book2.csv', 'book3.csv', 'book4.csv', 'book5.csv']
book1 = pd.read_csv("raw_data_books/raw_data_books/book1.csv")
book1.head()
| Person 1 | Person 2 | Type | weight | book | |
|---|---|---|---|---|---|
| 0 | Addam-Marbrand | Jaime-Lannister | Undirected | 3 | 1 |
| 1 | Addam-Marbrand | Tywin-Lannister | Undirected | 6 | 1 |
| 2 | Aegon-I-Targaryen | Daenerys-Targaryen | Undirected | 5 | 1 |
| 3 | Aegon-I-Targaryen | Eddard-Stark | Undirected | 4 | 1 |
| 4 | Aemon-Targaryen-(Maester-Aemon) | Alliser-Thorne | Undirected | 4 | 1 |
book2 = pd.read_csv("raw_data_books/raw_data_books/book2.csv")
book3 = pd.read_csv("raw_data_books/raw_data_books/book3.csv")
book4 = pd.read_csv("raw_data_books/raw_data_books/book4.csv")
book5 = pd.read_csv("raw_data_books/raw_data_books/book5.csv")
books = [book1, book2, book3, book4, book5]
books_combined = pd.DataFrame()
for book in books:
books_combined = pd.concat([books_combined, book])
books_combined = books_combined.groupby(["Person 2", "Person 1"], as_index = False)["weight"].sum()
books_combined.describe()
| weight | |
|---|---|
| count | 2,823.00 |
| mean | 11.56 |
| std | 19.98 |
| min | 3.00 |
| 25% | 3.00 |
| 50% | 5.00 |
| 75% | 11.00 |
| max | 334.00 |
books_combined[books_combined["weight"] == 334]
| Person 2 | Person 1 | weight | |
|---|---|---|---|
| 1570 | Robert-Baratheon | Eddard-Stark | 334 |
G1 = nx.from_pandas_edgelist(book1, 'Person 1', "Person 2", edge_attr = "weight", create_using = nx.Graph())
G2 = nx.from_pandas_edgelist(book2, 'Person 1', "Person 2", edge_attr = "weight", create_using = nx.Graph())
G3 = nx.from_pandas_edgelist(book3, 'Person 1', "Person 2", edge_attr = "weight", create_using = nx.Graph())
G4 = nx.from_pandas_edgelist(book4, 'Person 1', "Person 2", edge_attr = "weight", create_using = nx.Graph())
G5 = nx.from_pandas_edgelist(book5, 'Person 1', "Person 2", edge_attr = "weight", create_using = nx.Graph())
G = nx.from_pandas_edgelist(books_combined, 'Person 1', "Person 2", edge_attr = "weight", create_using = nx.Graph())
nx.info(G)
'Graph with 796 nodes and 2823 edges'
def numUniqueConnec(G):
numUniqueConnection = list(G.degree())
numUniqueConnection = sorted(numUniqueConnection, key = lambda x:x[1], reverse = True)
numUniqueConnection = pd.DataFrame.from_dict(numUniqueConnection)
numUniqueConnection.columns = (["Character", "NumberOfUniqueHCPConnections"])
return numUniqueConnection
numUniqueConnec(G)
| Character | NumberOfUniqueHCPConnections | |
|---|---|---|
| 0 | Tyrion-Lannister | 122 |
| 1 | Jon-Snow | 114 |
| 2 | Jaime-Lannister | 101 |
| 3 | Cersei-Lannister | 97 |
| 4 | Stannis-Baratheon | 89 |
| ... | ... | ... |
| 791 | Wynton-Stout | 1 |
| 792 | Bael-the-Bard | 1 |
| 793 | Yorko-Terys | 1 |
| 794 | Yurkhaz-zo-Yunzak | 1 |
| 795 | Zei | 1 |
796 rows × 2 columns
def deg_central(G):
deg_centrality = nx.degree_centrality(G)
deg_centrality_sort = sorted(deg_centrality.items(), key = lambda x:x[1], reverse = True)
deg_centrality_sort = pd.DataFrame.from_dict(deg_centrality_sort)
deg_centrality_sort.columns = (["Character", "Degree Centrality"])
return deg_centrality_sort
deg_centrality_sort = deg_central(G)
deg_central(G)
| Character | Degree Centrality | |
|---|---|---|
| 0 | Tyrion-Lannister | 0.15 |
| 1 | Jon-Snow | 0.14 |
| 2 | Jaime-Lannister | 0.13 |
| 3 | Cersei-Lannister | 0.12 |
| 4 | Stannis-Baratheon | 0.11 |
| ... | ... | ... |
| 791 | Wynton-Stout | 0.00 |
| 792 | Bael-the-Bard | 0.00 |
| 793 | Yorko-Terys | 0.00 |
| 794 | Yurkhaz-zo-Yunzak | 0.00 |
| 795 | Zei | 0.00 |
796 rows × 2 columns
def eigen_central(G):
eigen_centrality = nx.eigenvector_centrality(G, weight = "weight")
eigen_centrality_sort = sorted(eigen_centrality.items(), key = lambda x:x[1], reverse = True)
eigen_centrality_sort = pd.DataFrame.from_dict(eigen_centrality_sort)
eigen_centrality_sort.columns = (["Character", "EigenVector Centrality"])
return eigen_centrality_sort
eigen_central(G)
| Character | EigenVector Centrality | |
|---|---|---|
| 0 | Tyrion-Lannister | 0.38 |
| 1 | Cersei-Lannister | 0.36 |
| 2 | Joffrey-Baratheon | 0.34 |
| 3 | Robert-Baratheon | 0.28 |
| 4 | Eddard-Stark | 0.28 |
| ... | ... | ... |
| 791 | Simon-Toyne | 0.00 |
| 792 | Hugh-Hungerford | 0.00 |
| 793 | Murch | 0.00 |
| 794 | Torwold-Browntooth | 0.00 |
| 795 | Gormon-Tyrell | 0.00 |
796 rows × 2 columns
def betweenness_central(G):
betweenness_centrality = nx.betweenness_centrality(G, weight = "weight")
betweenness_centrality_sort = sorted(betweenness_centrality.items(), key = lambda x:x[1], reverse = True)
betweenness_centrality_sort = pd.DataFrame.from_dict(betweenness_centrality_sort)
betweenness_centrality_sort.columns = (["Character", "Betweenness Centrality"])
return betweenness_centrality_sort
betweenness_central(G)
| Character | Betweenness Centrality | |
|---|---|---|
| 0 | Jon-Snow | 0.13 |
| 1 | Theon-Greyjoy | 0.12 |
| 2 | Jaime-Lannister | 0.12 |
| 3 | Daenerys-Targaryen | 0.09 |
| 4 | Stannis-Baratheon | 0.09 |
| ... | ... | ... |
| 791 | Yandry | 0.00 |
| 792 | Bael-the-Bard | 0.00 |
| 793 | Yorko-Terys | 0.00 |
| 794 | Yurkhaz-zo-Yunzak | 0.00 |
| 795 | Zei | 0.00 |
796 rows × 2 columns
def draw_plotly_network_graph(Graph_obj, filter = None, filter_nodesbydegree = None):
G_dup = Graph_obj.copy()
degrees = nx.classes.degree(G_dup)
degree_df = pd.DataFrame(degrees)
# Filter out the nodes with fewer connections
if filter is not None:
top = deg_centrality_sort[:filter_nodesbydegree]["Character"].values # sort the top characters using filter_nodesbydegree
G_dup.remove_nodes_from([node
for node in G_dup.nodes
if node not in top
])
pos = nx.spring_layout(G_dup)
for n, p in pos.items():
G_dup.nodes[n]['pos'] = p
# Create edges
# Add edges as disconnected lines in a single trace and nodes as a scatter trace
edge_trace = go.Scatter(
x = [],
y = [],
line = dict(width = 0.5, color = '#888'),
hoverinfo = 'none',
mode = 'lines')
for edge in G_dup.edges():
x0, y0 = G_dup.nodes[edge[0]]['pos']
x1, y1 = G_dup.nodes[edge[1]]['pos']
edge_trace['x'] += tuple([x0, x1, None])
edge_trace['y'] += tuple([y0, y1, None])
node_trace = go.Scatter(
x = [],
y = [],
text = [],
mode = 'markers',
hoverinfo = 'text',
marker = dict(
showscale = True,
colorscale = 'RdBu',
reversescale = True,
color = [],
size = 15,
colorbar = dict(
thickness = 10,
title = 'Node Connections',
xanchor = 'left',
titleside = 'right'
),
line = dict(width = 0)))
for node in G_dup.nodes():
x, y = G_dup.nodes[node]['pos']
node_trace['x'] += tuple([x])
node_trace['y'] += tuple([y])
# Color node points by the number of connections
for node, adjacencies in enumerate(G_dup.adjacency()):
node_trace['marker']['color'] += tuple([int(degree_df[degree_df[0] == adjacencies[0]][1].values)])
node_info = adjacencies[0] + '<br /># of connections: ' + str(int(degree_df[degree_df[0] == adjacencies[0]][1].values))
node_trace['text'] += tuple([node_info])
# Create a network graph
fig = go.Figure(data = [edge_trace, node_trace],
layout = go.Layout(
title = '<br>GOT network connections',
titlefont = dict(size = 20),
showlegend = False,
hovermode = 'closest',
margin = dict(b = 20, l = 5, r = 5, t = 0),
annotations=[ dict(
text = "",
showarrow = False,
xref = "paper", yref = "paper") ],
xaxis = dict(showgrid = False, zeroline = False, showticklabels = False),
yaxis = dict(showgrid = False, zeroline = False, showticklabels = False)))
pyo.iplot(fig)
draw_plotly_network_graph(Graph_obj = G, filter = None, filter_nodesbydegree = None)
draw_plotly_network_graph(Graph_obj = G, filter = "Yes", filter_nodesbydegree = 50)
draw_plotly_network_graph(Graph_obj = G1, filter = "Yes", filter_nodesbydegree = 50)
#Top 50 characters network in Book 1
deg_central(G1)[:20]
| Character | Degree Centrality | |
|---|---|---|
| 0 | Eddard-Stark | 0.35 |
| 1 | Robert-Baratheon | 0.27 |
| 2 | Tyrion-Lannister | 0.25 |
| 3 | Catelyn-Stark | 0.23 |
| 4 | Jon-Snow | 0.20 |
| 5 | Robb-Stark | 0.19 |
| 6 | Sansa-Stark | 0.19 |
| 7 | Bran-Stark | 0.17 |
| 8 | Cersei-Lannister | 0.16 |
| 9 | Joffrey-Baratheon | 0.16 |
| 10 | Jaime-Lannister | 0.16 |
| 11 | Arya-Stark | 0.15 |
| 12 | Petyr-Baelish | 0.14 |
| 13 | Tywin-Lannister | 0.12 |
| 14 | Daenerys-Targaryen | 0.11 |
| 15 | Jory-Cassel | 0.11 |
| 16 | Drogo | 0.10 |
| 17 | Rodrik-Cassel | 0.10 |
| 18 | Renly-Baratheon | 0.10 |
| 19 | Luwin | 0.10 |
draw_plotly_network_graph(Graph_obj = G2, filter = "Yes", filter_nodesbydegree = 50)
deg_central(G2)[:20]
| Character | Degree Centrality | |
|---|---|---|
| 0 | Tyrion-Lannister | 0.21 |
| 1 | Joffrey-Baratheon | 0.18 |
| 2 | Cersei-Lannister | 0.17 |
| 3 | Arya-Stark | 0.16 |
| 4 | Stannis-Baratheon | 0.14 |
| 5 | Robb-Stark | 0.14 |
| 6 | Catelyn-Stark | 0.13 |
| 7 | Theon-Greyjoy | 0.12 |
| 8 | Renly-Baratheon | 0.12 |
| 9 | Bran-Stark | 0.12 |
| 10 | Jon-Snow | 0.11 |
| 11 | Sansa-Stark | 0.10 |
| 12 | Robert-Baratheon | 0.10 |
| 13 | Eddard-Stark | 0.09 |
| 14 | Jaime-Lannister | 0.08 |
| 15 | Varys | 0.08 |
| 16 | Daenerys-Targaryen | 0.07 |
| 17 | Amory-Lorch | 0.07 |
| 18 | Sandor-Clegane | 0.07 |
| 19 | Tywin-Lannister | 0.07 |
draw_plotly_network_graph(Graph_obj = G3, filter = "Yes", filter_nodesbydegree = 50)
deg_central(G3)[:20]
| Character | Degree Centrality | |
|---|---|---|
| 0 | Tyrion-Lannister | 0.20 |
| 1 | Jon-Snow | 0.17 |
| 2 | Joffrey-Baratheon | 0.17 |
| 3 | Robb-Stark | 0.16 |
| 4 | Sansa-Stark | 0.16 |
| 5 | Jaime-Lannister | 0.15 |
| 6 | Catelyn-Stark | 0.13 |
| 7 | Cersei-Lannister | 0.13 |
| 8 | Arya-Stark | 0.12 |
| 9 | Stannis-Baratheon | 0.10 |
| 10 | Samwell-Tarly | 0.10 |
| 11 | Tywin-Lannister | 0.10 |
| 12 | Robert-Baratheon | 0.09 |
| 13 | Daenerys-Targaryen | 0.08 |
| 14 | Mance-Rayder | 0.07 |
| 15 | Gregor-Clegane | 0.07 |
| 16 | Sandor-Clegane | 0.07 |
| 17 | Aemon-Targaryen-(Maester-Aemon) | 0.06 |
| 18 | Jeor-Mormont | 0.06 |
| 19 | Davos-Seaworth | 0.06 |
draw_plotly_network_graph(Graph_obj = G4, filter = "Yes", filter_nodesbydegree = 50)
deg_central(G4)[:20]
| Character | Degree Centrality | |
|---|---|---|
| 0 | Jaime-Lannister | 0.23 |
| 1 | Cersei-Lannister | 0.22 |
| 2 | Brienne-of-Tarth | 0.10 |
| 3 | Tyrion-Lannister | 0.10 |
| 4 | Margaery-Tyrell | 0.09 |
| 5 | Sansa-Stark | 0.09 |
| 6 | Tommen-Baratheon | 0.09 |
| 7 | Samwell-Tarly | 0.07 |
| 8 | Stannis-Baratheon | 0.07 |
| 9 | Petyr-Baelish | 0.07 |
| 10 | Victarion-Greyjoy | 0.06 |
| 11 | Arianne-Martell | 0.06 |
| 12 | Tywin-Lannister | 0.06 |
| 13 | Arya-Stark | 0.06 |
| 14 | Osmund-Kettleblack | 0.05 |
| 15 | Pycelle | 0.05 |
| 16 | Robert-Arryn | 0.05 |
| 17 | Aeron-Greyjoy | 0.05 |
| 18 | Qyburn | 0.05 |
| 19 | Robert-Baratheon | 0.05 |
betweenness_central(G4)[:20]
| Character | Betweenness Centrality | |
|---|---|---|
| 0 | Stannis-Baratheon | 0.24 |
| 1 | Balon-Greyjoy | 0.19 |
| 2 | Jaime-Lannister | 0.18 |
| 3 | Baelor-Blacktyde | 0.17 |
| 4 | Cersei-Lannister | 0.17 |
| 5 | Tyrion-Lannister | 0.17 |
| 6 | Sansa-Stark | 0.16 |
| 7 | Arya-Stark | 0.12 |
| 8 | Samwell-Tarly | 0.12 |
| 9 | Tywin-Lannister | 0.10 |
| 10 | Myrcella-Baratheon | 0.09 |
| 11 | Sandor-Clegane | 0.09 |
| 12 | Brienne-of-Tarth | 0.09 |
| 13 | Doran-Martell | 0.07 |
| 14 | Victarion-Greyjoy | 0.07 |
| 15 | Catelyn-Stark | 0.06 |
| 16 | Aurane-Waters | 0.06 |
| 17 | Tommen-Baratheon | 0.05 |
| 18 | Randyll-Tarly | 0.05 |
| 19 | Leo-Tyrell | 0.05 |
draw_plotly_network_graph(Graph_obj = G5, filter = "Yes", filter_nodesbydegree = 50)
deg_central(G5)[:20]
| Character | Degree Centrality | |
|---|---|---|
| 0 | Jon-Snow | 0.20 |
| 1 | Daenerys-Targaryen | 0.18 |
| 2 | Stannis-Baratheon | 0.15 |
| 3 | Tyrion-Lannister | 0.10 |
| 4 | Theon-Greyjoy | 0.10 |
| 5 | Cersei-Lannister | 0.09 |
| 6 | Barristan-Selmy | 0.08 |
| 7 | Hizdahr-zo-Loraq | 0.07 |
| 8 | Asha-Greyjoy | 0.06 |
| 9 | Melisandre | 0.05 |
| 10 | Jon-Connington | 0.05 |
| 11 | Quentyn-Martell | 0.05 |
| 12 | Mance-Rayder | 0.05 |
| 13 | Ramsay-Snow | 0.05 |
| 14 | Aegon-Targaryen-(son-of-Rhaegar) | 0.05 |
| 15 | Robert-Baratheon | 0.05 |
| 16 | Daario-Naharis | 0.05 |
| 17 | Doran-Martell | 0.05 |
| 18 | Selyse-Florent | 0.05 |
| 19 | Wyman-Manderly | 0.04 |
betweenness_central(G5)[:20]
| Character | Betweenness Centrality | |
|---|---|---|
| 0 | Stannis-Baratheon | 0.36 |
| 1 | Daenerys-Targaryen | 0.25 |
| 2 | Jon-Snow | 0.21 |
| 3 | Robert-Baratheon | 0.20 |
| 4 | Asha-Greyjoy | 0.17 |
| 5 | Tyrion-Lannister | 0.16 |
| 6 | Cersei-Lannister | 0.14 |
| 7 | Godry-Farring | 0.10 |
| 8 | Tywin-Lannister | 0.10 |
| 9 | Barristan-Selmy | 0.08 |
| 10 | Eddard-Stark | 0.08 |
| 11 | Theon-Greyjoy | 0.07 |
| 12 | Doran-Martell | 0.07 |
| 13 | Axell-Florent | 0.07 |
| 14 | Wyman-Manderly | 0.06 |
| 15 | Bowen-Marsh | 0.05 |
| 16 | Aegon-Targaryen-(son-of-Rhaegar) | 0.05 |
| 17 | Mance-Rayder | 0.05 |
| 18 | Bran-Stark | 0.05 |
| 19 | Theomore | 0.04 |
# Creating a list of degree centrality of all the books
Books_Graph = [G1, G2, G3, G4, G5]
evol = [nx.degree_centrality(Graph) for Graph in Books_Graph]
# Creating a DataFrame from the list of degree centralities in all the books
degree_evol_df = pd.DataFrame.from_records(evol)
degree_evol_df.index = degree_evol_df.index + 1
# Plotting the degree centrality evolution of few important characters
fig = px.line(degree_evol_df[['Eddard-Stark', 'Tyrion-Lannister', 'Jon-Snow', 'Jaime-Lannister', 'Cersei-Lannister', 'Sansa-Stark', 'Arya-Stark']],
title = "Evolution of Different Characters", width = 900, height = 600)
fig.update_layout(xaxis_title = 'Book Number',
yaxis_title = 'Degree Centrality Score',
legend = {'title_text': ''})
fig.show()
import community as community_louvain
import matplotlib.cm as cm
import colorlover as cl
partition = community_louvain.best_partition(G, random_state = 12345)
partition_df = pd.DataFrame([partition]).T.reset_index()
partition_df.columns = ["Character", "Community"]
partition_df
| Character | Community | |
|---|---|---|
| 0 | Aegon-V-Targaryen | 0 |
| 1 | Aemon-Targaryen-(Maester-Aemon) | 0 |
| 2 | Alleras | 1 |
| 3 | Alliser-Thorne | 0 |
| 4 | Andrey-Dalt | 2 |
| ... | ... | ... |
| 791 | Yorko-Terys | 7 |
| 792 | Ysilla | 8 |
| 793 | Yurkhaz-zo-Yunzak | 10 |
| 794 | Zei | 0 |
| 795 | Zollo | 5 |
796 rows × 2 columns
partition_df["Community"].value_counts().sort_values(ascending = False)
8 136 0 114 9 113 5 110 10 89 6 70 12 68 7 51 2 25 1 11 13 3 11 2 3 2 4 2 Name: Community, dtype: int64
colors = cl.scales['12']['qual']['Paired']
def scatter_nodes(G, pos, labels = None, color = 'rgb(152, 0, 0)', size = 8, opacity = 1):
# pos is the dictionary of node positions
# labels is a list of labels of len(pos), to be displayed when hovering the mouse over the nodes
# color is the color for nodes. When it is set as None, the Plotly's default color is used
# size is the size of the dots representing the nodes
# opacity is a value between 0 and 1, defining the node color opacity
trace = go.Scatter(x = [],
y = [],
text = [],
mode = 'markers',
hoverinfo = 'text',
marker = dict(
showscale = False,
colorscale = 'RdBu',
reversescale = True,
color = [],
size = 15,
colorbar = dict(
thickness = 10,
xanchor = 'left',
titleside = 'right'
),
line = dict(width = 0)))
for nd in G.nodes():
x, y = G.nodes[nd]['pos']
trace['x'] += tuple([x])
trace['y'] += tuple([y])
color = colors[partition[nd] % len(colors)]
trace['marker']['color'] += tuple([color])
for node, adjacencies in enumerate(G.adjacency()):
node_info = adjacencies[0]
trace['text'] += tuple([node_info])
return trace
def scatter_edges(G, pos, line_color = '#a3a3c2', line_width = 1, opacity = .2):
trace = go.Scatter(x = [],
y = [],
mode = 'lines'
)
for edge in G.edges():
x0, y0 = G.nodes[edge[0]]['pos']
x1, y1 = G.nodes[edge[1]]['pos']
trace['x'] += tuple([x0, x1, None])
trace['y'] += tuple([y0, y1, None])
trace['hoverinfo'] = 'none'
trace['line']['width'] = line_width
if line_color is not None:
trace['line']['color'] = line_color
return trace
def visualize_community(Graph, filter = "Yes", filter_nodes = 100):
G_dup = G.copy()
degrees = nx.classes.degree(G_dup)
degree_df = pd.DataFrame(degrees)
if filter is not None:
top = deg_centrality_sort[:filter_nodes]["Character"].values
G_dup.remove_nodes_from([node
for node in G_dup.nodes
if node not in top
])
pos = nx.spring_layout(G_dup, seed = 1234567)
for n, p in pos.items():
G_dup.nodes[n]['pos'] = p
trace1 = scatter_edges(G_dup, pos, line_width = 0.25)
trace2 = scatter_nodes(G_dup, pos)
fig = go.Figure(data = [trace1, trace2],
layout = go.Layout(
title = '<br> GOT Community Detection',
titlefont = dict(size = 20),
showlegend = False,
hovermode = 'closest',
margin = dict(b = 20, l = 5, r = 5, t = 40),
annotations = [ dict(
text = "",
showarrow = False,
xref = "paper", yref = "paper") ],
xaxis = dict(showgrid = False, zeroline = False, showticklabels = False),
yaxis = dict(showgrid = False, zeroline = False, showticklabels = False)))
iplot(fig)
visualize_community(Graph = G, filter = "Yes", filter_nodes = 100)